package MR_process.MR1;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;

public class Map1 extends Mapper<LongWritable,Text, Text ,LongWritable> {

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        // 获取字段 "id" 和 "combined_text"
        String line = value.toString();
        String[] two_parts = line.split(",");
        //
        String doc_id = two_parts[0];
        String doc_content = two_parts[1];
        String[] words = doc_content.split("\\s+");

        for (String word : words) {
            if (word.matches("[a-zA-Z]+")) { // 匹配仅包含字母的单词
                word = word.toLowerCase();
                context.write(new Text(doc_id+" "+word), new LongWritable(1));
            }
        }
    }
}
